# Kalmia analyze fruit size at end of season
# Using fruit sizes calculated from image segmentation in Python with opencv
# Callin Switzer
# 20 October 2016
# 10 Feb 2017 Update: Conducted Statistical Modeling with LMER and GLMER
# 11 March 2017 Update: added random effect to MER models to account for plant lineage
Read in data
ipak <- function(pkg){
new.pkg <- pkg[!(pkg %in% installed.packages()[, "Package"])]
if(length(new.pkg)) install.packages(new.pkg, dependencies = TRUE)
sapply(pkg, require, character.only = TRUE)
}
packages <- c("ggplot2", 'lme4', 'plyr', 'influence.ME', 'sjPlot', 'multcomp', 'lsmeans', 'Matrix')
ipak(packages)
## ggplot2 lme4 plyr influence.ME sjPlot
## TRUE TRUE TRUE TRUE TRUE
## multcomp lsmeans Matrix
## TRUE TRUE TRUE
setwd("/Users/callinswitzer/Dropbox/ExperSummer2016/Kalmia/Manuscript/DatasetsSupplemental/")
theme_set(theme_classic())
kfrt <- read.csv("kalmiaFruitFinal.csv", stringsAsFactors = FALSE)
nrow(kfrt[kfrt$dia_mm == 20.0,]) # number of images taken
## [1] 92
# clean and process data
kfrt <- kfrt[kfrt$dia_mm != 20.0, ] # 20 is the reference object in segmented images
nrow(kfrt) # number of total fruits measured
## [1] 1305
# label treatmens and access numbers
kfrt$trt <- sapply(X = 1:nrow(kfrt), FUN = function(x) strsplit(kfrt$plantNum[x], split = "__")[[1]][2])
kfrt$accessNum <- sapply(X = 1:nrow(kfrt), FUN = function(x) strsplit(kfrt$plantNum[x], split = "__")[[1]][1])
kfrt$trt <- mapvalues(kfrt$trt, c(1,2,3,4,5), c("Bagged", "Bagged & Selfed", "Unbagged",
"Unbagged & Outcrossed", "Unbagged_2"))
# add plant lineage (all plants that start with 1129 are from the lineage, 673_69 )
plantInds <- 1:nrow(kfrt) %in% grep(pattern = "1129", x = kfrt$plantNum)
kfrt$plantLineage <- sapply(1:length(plantInds), FUN = function(x){
ifelse(plantInds[x], "673_69", paste(strsplit(kfrt$plantNum[x], split = "_")[[1]][1:2], collapse = "_") )})
Add plants with 0 count to the dataset
# count number of fruits
counts = as.data.frame(table(kfrt$plantNum))
counts$trt = sapply(X = 1:nrow(counts), FUN = function(x) strsplit(as.character(counts$Var1[x]), split = "__")[[1]][2])
counts$trt <- mapvalues(counts$trt, c(1,2,3,4,5), c("Bagged", "Bagged & Selfed", "Unbagged",
"Unbagged & Outcrossed", "Unbagged_2"))
counts$accessNum = sapply(X = 1:nrow(counts), FUN = function(x) strsplit(as.character(counts$Var1[x]), split = "__")[[1]][1])
# add in the trts and accession numbers that had counts of 0
# read in error-checked datasheet
kalNotes <- read.csv("KalmiaDailyDatasheets_ErrorChecked.csv")
# get the accession numbers I should have
accNumsHave <- unique(kalNotes$Plant.Number)
accNumsHave <- accNumsHave[!(accNumsHave %in% c("", "Plant Number"))]
#change formatting to match above
accNumsHave <- gsub("#", "", accNumsHave)
accNumsHave <- gsub("\ ", "_", accNumsHave)
accNumsHave <- gsub("\\-", "_", accNumsHave)
accNumsHave <- gsub("\\*", "_", accNumsHave)
accNumsHave <-toupper(accNumsHave)
shouldHave <- as.data.frame(t(sapply(accNumsHave, function(x) as.character(paste(x, c(1:4), sep = "__")))))
row.names(shouldHave) <- NULL
shouldHave1 <- c(as.character(shouldHave[, 1]),
as.character(shouldHave[, 2]),
as.character(shouldHave[, 3]),
as.character(shouldHave[, 4]))
# find missing ones -- this is the ones that
# are in the daily datasheet that aren't in the fruit measurements
missingTrts <- setdiff(shouldHave1,unique(kfrt$plantNum[kfrt$trt != "5"]))
# this should be 0 -- the ones from the fruit measurements that aren't in the daily datasheet
setdiff(unique(kfrt$plantNum[kfrt$trt != "Unbagged_2"]), shouldHave1)
## character(0)
# here's the list of plants that had their bags/tags go missing during the experiment (meaning that
# I was unable to collect fruits)
# note: "677_66_MASS__1" was the only plant that was missing a tag during the final collection
# that wasn't missing in one of my previous checks.
NAList <- c("1129_74_E__4", "1129_74_C__4", "39_60_A__3", "685_70_A__2", "677_66_MASS__1")
# note: this ignores trt#5 which is the same as #3
ZeroFruitPlants <- missingTrts[!(missingTrts %in% NAList)]
zfdf <- data.frame(Var1 = ZeroFruitPlants,
Freq = 0,
trt = sapply(X = 1:length(ZeroFruitPlants),
FUN = function(x) strsplit(as.character(ZeroFruitPlants[x]),
split = "__")[[1]][2]),
accessNum = sapply(X = 1:length(ZeroFruitPlants),
FUN = function(x) strsplit(as.character(ZeroFruitPlants[x]),
split = "__")[[1]][1])
)
zfdf$trt <- mapvalues(zfdf$trt, c(1,2,3,4,5), c("Bagged", "Bagged & Selfed", "Unbagged",
"Unbagged & Outcrossed", "Unbagged_2"))
## The following `from` values were not present in `x`: 3, 4, 5
countFin <- rbind(counts, zfdf)
# final fruit counts for the fruits collected at the end of the experiment
countFin <- countFin[order(countFin$accessNum, countFin$trt, decreasing = FALSE), ]
# change labels from unbagged to conntrol
countFin$trt <- mapvalues(countFin$trt, c("Bagged", "Bagged & Selfed", "Unbagged",
"Unbagged & Outcrossed", "Unbagged_2")
, c("Bagged", "Bagged & Selfed", "Control",
"Unbagged & Outcrossed", "Control_2"))
# change levels, so that control is first
countFin$trt <- factor(countFin$trt, levels = c("Control","Control_2", "Bagged", "Bagged & Selfed",
"Unbagged & Outcrossed"))
plantInds <- 1:nrow(countFin) %in% grep(pattern = "1129", x = countFin$accessNum)
countFin$plantLineage <- sapply(1:length(plantInds), FUN = function(x){
ifelse(plantInds[x], "673_69", paste(strsplit(countFin$accessNum[x], split = "_")[[1]][1:2], collapse = "_") )})
Visualize counts of fruits
ggplot(countFin, aes(x = trt , y = Freq, fill = trt)) +
geom_boxplot() +
# geom_violin()+
labs(y = "Number of fruits", x = "Treatment") +
scale_fill_brewer(name = "Treatment", palette = "Set1")

saveDir <- "/Users/callinswitzer/Dropbox/ExperSummer2016/Kalmia/Manuscript/Media/"
ggsave(paste0(saveDir, "KalmiaFruitNumber.pdf"), width = 10, height = 8)
Visualize counts of fruits (ignore treatment #5)
ggplot(countFin[!(countFin$trt %in% 'Control_2'), ], aes(x = trt , y = Freq, fill = trt)) +
geom_boxplot() +
# geom_point(aes(color = plantLineage)) +
labs(y = "Number of fruits", x = "Treatment") +
theme(axis.text.x = element_text(angle = 35, hjust = 1),
legend.position = 'none') +
scale_fill_brewer(name = "Treatment", palette = "Set1")

ggsave(paste0(saveDir, "KalmiaFruitNumber_trt1_4Only.pdf"), width = 5, height = 4)
Use GLMM’s to find differences in number of fruits
First, summarize the dataset that I will be using
# I'm ignoring control#2, because it was originally intended for another experiment (analysis wasn't planned)
cf1 <- countFin[countFin$trt != "Control_2",]
nrow(countFin) # number of total counts, including Control2
## [1] 104
sum(cf1$Freq) # total number of fruits in the analysis of count
## [1] 1035
# Number of counts, excluding control_2
# this is different from the number of photos
# because I didn't take photos on 0-count plants
nrow(cf1)
## [1] 84
data.frame(table(cf1$trt)) # number of plants in each treatment that we are analyzing
## Var1 Freq
## 1 Control 21
## 2 Control_2 0
## 3 Bagged 22
## 4 Bagged & Selfed 21
## 5 Unbagged & Outcrossed 20
data.frame(table(cf1$accessNum)) # shows number of counts per plant -- 4 means that we counted all four treatments
## Var1 Freq
## 1 1129_74_A 4
## 2 1129_74_B 4
## 3 1129_74_C 3
## 4 1129_74_E 3
## 5 1129_74_F 4
## 6 12_2006_A 4
## 7 128_96_MASS 4
## 8 132_98_MASS 4
## 9 150_58_A 4
## 10 232_46_A 4
## 11 35_40_C 4
## 12 39_60_A 3
## 13 425_74_D 4
## 14 440_66_A 4
## 15 46_18_A 4
## 16 572_64_MASS 4
## 17 624_64_B 4
## 18 667_66_MASS 4
## 19 673_69_B 4
## 20 685_70_A 3
## 21 721_79_B 4
## 22 UNLABELED_1 4
# shows which treatments / plants are missing from analysis
# this is different from the plants that just had 0 counts
data.frame(table(interaction(cf1$accessNum , cf1$trt)))
## Var1 Freq
## 1 1129_74_A.Control 1
## 2 1129_74_B.Control 1
## 3 1129_74_C.Control 1
## 4 1129_74_E.Control 1
## 5 1129_74_F.Control 1
## 6 12_2006_A.Control 1
## 7 128_96_MASS.Control 1
## 8 132_98_MASS.Control 1
## 9 150_58_A.Control 1
## 10 232_46_A.Control 1
## 11 35_40_C.Control 1
## 12 39_60_A.Control 0
## 13 425_74_D.Control 1
## 14 440_66_A.Control 1
## 15 46_18_A.Control 1
## 16 572_64_MASS.Control 1
## 17 624_64_B.Control 1
## 18 667_66_MASS.Control 1
## 19 673_69_B.Control 1
## 20 685_70_A.Control 1
## 21 721_79_B.Control 1
## 22 UNLABELED_1.Control 1
## 23 1129_74_A.Control_2 0
## 24 1129_74_B.Control_2 0
## 25 1129_74_C.Control_2 0
## 26 1129_74_E.Control_2 0
## 27 1129_74_F.Control_2 0
## 28 12_2006_A.Control_2 0
## 29 128_96_MASS.Control_2 0
## 30 132_98_MASS.Control_2 0
## 31 150_58_A.Control_2 0
## 32 232_46_A.Control_2 0
## 33 35_40_C.Control_2 0
## 34 39_60_A.Control_2 0
## 35 425_74_D.Control_2 0
## 36 440_66_A.Control_2 0
## 37 46_18_A.Control_2 0
## 38 572_64_MASS.Control_2 0
## 39 624_64_B.Control_2 0
## 40 667_66_MASS.Control_2 0
## 41 673_69_B.Control_2 0
## 42 685_70_A.Control_2 0
## 43 721_79_B.Control_2 0
## 44 UNLABELED_1.Control_2 0
## 45 1129_74_A.Bagged 1
## 46 1129_74_B.Bagged 1
## 47 1129_74_C.Bagged 1
## 48 1129_74_E.Bagged 1
## 49 1129_74_F.Bagged 1
## 50 12_2006_A.Bagged 1
## 51 128_96_MASS.Bagged 1
## 52 132_98_MASS.Bagged 1
## 53 150_58_A.Bagged 1
## 54 232_46_A.Bagged 1
## 55 35_40_C.Bagged 1
## 56 39_60_A.Bagged 1
## 57 425_74_D.Bagged 1
## 58 440_66_A.Bagged 1
## 59 46_18_A.Bagged 1
## 60 572_64_MASS.Bagged 1
## 61 624_64_B.Bagged 1
## 62 667_66_MASS.Bagged 1
## 63 673_69_B.Bagged 1
## 64 685_70_A.Bagged 1
## 65 721_79_B.Bagged 1
## 66 UNLABELED_1.Bagged 1
## 67 1129_74_A.Bagged & Selfed 1
## 68 1129_74_B.Bagged & Selfed 1
## 69 1129_74_C.Bagged & Selfed 1
## 70 1129_74_E.Bagged & Selfed 1
## 71 1129_74_F.Bagged & Selfed 1
## 72 12_2006_A.Bagged & Selfed 1
## 73 128_96_MASS.Bagged & Selfed 1
## 74 132_98_MASS.Bagged & Selfed 1
## 75 150_58_A.Bagged & Selfed 1
## 76 232_46_A.Bagged & Selfed 1
## 77 35_40_C.Bagged & Selfed 1
## 78 39_60_A.Bagged & Selfed 1
## 79 425_74_D.Bagged & Selfed 1
## 80 440_66_A.Bagged & Selfed 1
## 81 46_18_A.Bagged & Selfed 1
## 82 572_64_MASS.Bagged & Selfed 1
## 83 624_64_B.Bagged & Selfed 1
## 84 667_66_MASS.Bagged & Selfed 1
## 85 673_69_B.Bagged & Selfed 1
## 86 685_70_A.Bagged & Selfed 0
## 87 721_79_B.Bagged & Selfed 1
## 88 UNLABELED_1.Bagged & Selfed 1
## 89 1129_74_A.Unbagged & Outcrossed 1
## 90 1129_74_B.Unbagged & Outcrossed 1
## 91 1129_74_C.Unbagged & Outcrossed 0
## 92 1129_74_E.Unbagged & Outcrossed 0
## 93 1129_74_F.Unbagged & Outcrossed 1
## 94 12_2006_A.Unbagged & Outcrossed 1
## 95 128_96_MASS.Unbagged & Outcrossed 1
## 96 132_98_MASS.Unbagged & Outcrossed 1
## 97 150_58_A.Unbagged & Outcrossed 1
## 98 232_46_A.Unbagged & Outcrossed 1
## 99 35_40_C.Unbagged & Outcrossed 1
## 100 39_60_A.Unbagged & Outcrossed 1
## 101 425_74_D.Unbagged & Outcrossed 1
## 102 440_66_A.Unbagged & Outcrossed 1
## 103 46_18_A.Unbagged & Outcrossed 1
## 104 572_64_MASS.Unbagged & Outcrossed 1
## 105 624_64_B.Unbagged & Outcrossed 1
## 106 667_66_MASS.Unbagged & Outcrossed 1
## 107 673_69_B.Unbagged & Outcrossed 1
## 108 685_70_A.Unbagged & Outcrossed 1
## 109 721_79_B.Unbagged & Outcrossed 1
## 110 UNLABELED_1.Unbagged & Outcrossed 1
# create "average plant" for the plants that are cuttings of 673_64
avgClf <- tapply(X = cf1$Freq, INDEX = interaction(cf1$plantLineage, droplevels(cf1$trt)), mean, na.rm = TRUE)
cts <- tapply(X = cf1$Freq, INDEX = interaction(cf1$plantLineage, droplevels(cf1$trt)), length)
trts <- do.call(rbind, strsplit(rownames(avgClf), split = "\\."))
avgDF <- data.frame(cbind( avgClf, trts, cts))
colnames(avgDF) <- c("avgFreq", "plantLineage", "trt", "count")
avgDF$avgFreq <- round(as.numeric(as.character(avgDF$avgFreq)))
avgDF <- avgDF[order(avgDF$plantLineage, avgDF$trt), ]
avgDF$trt <- relevel(as.factor(avgDF$trt), ref = "Control")
avgDF <- na.omit(avgDF)
cf1 <- cf1[order(cf1$plantLineage, as.character(cf1$trt)), ]
acn <- character()
for(ii in 1:nrow(avgDF)){
plLin <- as.character(avgDF$plantLineage[ii])
acn[ii] = cf1$accessNum[grep(plLin, cf1$accessNum)[1]]
}
avgDF$accessNum <- acn
avgDF$avgFreq
## [1] 2 5 3 12 9 12 8 35 0 0 6 11 13 7 11 8 0 4 1 8 8 27 18
## [24] 17 6 16 15 14 0 18 62 1 19 15 18 2 0 3 14 1 0 4 26 0 5 17
## [47] 21 0 34 13 22 6 25 17 32 0 9 14 0 0 1 9 1 4 6 7
ggplot(avgDF, aes(x = trt , y = avgFreq, fill = trt)) +
geom_boxplot() +
geom_point(aes(color = plantLineage == "673_69"))

ggplot(avgDF, aes(x = trt , y = avgFreq, fill = trt)) +
geom_boxplot() +
geom_point(aes(color = plantLineage == "673_69"), position = position_jitter(width = 0.1, height = 0)) +
labs(y = "Number of fruits", x = "Treatment") +
ylim(c(0, 70)) +
theme(axis.text.x = element_text(angle = 35, hjust = 1),
legend.position = 'none') +
scale_fill_brewer(name = "Treatment", palette = "Set1")

ggplot(countFin[!(countFin$trt %in% 'Control_2'), ], aes(x = trt , y = Freq, fill = trt)) +
geom_boxplot() +
geom_point(aes(color = plantLineage == "673_69"), position = position_jitter(width = 0.1, height = 0)) +
ylim(c(0, 70)) +
labs(y = "Number of fruits", x = "Treatment") +
theme(axis.text.x = element_text(angle = 35, hjust = 1),
legend.position = 'none') +
scale_fill_brewer(name = "Treatment", palette = "Set1")

nrow(avgDF)
## [1] 66
avgDF$count
## 12_2006.Bagged 12_2006.Bagged & Selfed
## 1 1
## 12_2006.Control 12_2006.Unbagged & Outcrossed
## 1 1
## 128_96.Bagged 128_96.Bagged & Selfed
## 1 1
## 128_96.Control 128_96.Unbagged & Outcrossed
## 1 1
## 132_98.Bagged 132_98.Bagged & Selfed
## 1 1
## 132_98.Control 132_98.Unbagged & Outcrossed
## 1 1
## 150_58.Bagged 150_58.Bagged & Selfed
## 1 1
## 150_58.Control 150_58.Unbagged & Outcrossed
## 1 1
## 232_46.Bagged 232_46.Bagged & Selfed
## 1 1
## 232_46.Control 232_46.Unbagged & Outcrossed
## 1 1
## 35_40.Bagged 35_40.Bagged & Selfed
## 1 1
## 35_40.Control 35_40.Unbagged & Outcrossed
## 1 1
## 39_60.Bagged 39_60.Bagged & Selfed
## 1 1
## 39_60.Unbagged & Outcrossed 425_74.Bagged
## 1 1
## 425_74.Bagged & Selfed 425_74.Control
## 1 1
## 425_74.Unbagged & Outcrossed 440_66.Bagged
## 1 1
## 440_66.Bagged & Selfed 440_66.Control
## 1 1
## 440_66.Unbagged & Outcrossed 46_18.Bagged
## 1 1
## 46_18.Bagged & Selfed 46_18.Control
## 1 1
## 46_18.Unbagged & Outcrossed 572_64.Bagged
## 1 1
## 572_64.Bagged & Selfed 572_64.Control
## 1 1
## 572_64.Unbagged & Outcrossed 624_64.Bagged
## 1 1
## 624_64.Bagged & Selfed 624_64.Control
## 1 1
## 624_64.Unbagged & Outcrossed 667_66.Bagged
## 1 1
## 667_66.Bagged & Selfed 667_66.Control
## 1 1
## 667_66.Unbagged & Outcrossed 673_69.Bagged
## 1 6
## 673_69.Bagged & Selfed 673_69.Control
## 6 6
## 673_69.Unbagged & Outcrossed 685_70.Bagged
## 4 1
## 685_70.Control 685_70.Unbagged & Outcrossed
## 1 1
## 721_79.Bagged 721_79.Bagged & Selfed
## 1 1
## 721_79.Control 721_79.Unbagged & Outcrossed
## 1 1
## UNLABELED_1.Bagged UNLABELED_1.Bagged & Selfed
## 1 1
## UNLABELED_1.Control UNLABELED_1.Unbagged & Outcrossed
## 1 1
## Levels: 1 4 6
m1 <- glmer(avgFreq ~ trt + (1|accessNum), family = poisson, data = avgDF)
summary(m1)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: poisson ( log )
## Formula: avgFreq ~ trt + (1 | accessNum)
## Data: avgDF
##
## AIC BIC logLik deviance df.resid
## 532.8 543.7 -261.4 522.8 61
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -4.5826 -1.2609 -0.2704 0.8947 5.2137
##
## Random effects:
## Groups Name Variance Std.Dev.
## accessNum (Intercept) 0.3939 0.6276
## Number of obs: 66, groups: accessNum, 17
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 2.06227 0.17464 11.808 < 2e-16 ***
## trtBagged -0.93468 0.14975 -6.242 4.33e-10 ***
## trtBagged & Selfed 0.02795 0.11430 0.245 0.807
## trtUnbagged & Outcrossed 0.72430 0.09851 7.353 1.95e-13 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) trtBgg trtB&S
## trtBagged -0.256
## trtBggd&Slf -0.334 0.393
## trtUnbggd&O -0.389 0.455 0.597
avgPreds <- predict(m1)
length(avgPreds)
## [1] 66
Create a model
m1 <- glmer(Freq ~ trt + I(plantLineage == "673_69") + (1|accessNum), family = poisson, data = cf1)
summary(m1)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: poisson ( log )
## Formula: Freq ~ trt + I(plantLineage == "673_69") + (1 | accessNum)
## Data: cf1
##
## AIC BIC logLik deviance df.resid
## 705.7 720.3 -346.8 693.7 78
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -4.827 -1.299 -0.306 1.134 5.617
##
## Random effects:
## Groups Name Variance Std.Dev.
## accessNum (Intercept) 0.3218 0.5673
## Number of obs: 84, groups: accessNum, 22
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.96660 0.16118 12.201 < 2e-16 ***
## trtBagged -0.97014 0.12276 -7.903 2.73e-15 ***
## trtBagged & Selfed 0.18833 0.08836 2.131 0.03306 *
## trtUnbagged & Outcrossed 0.75376 0.08360 9.016 < 2e-16 ***
## I(plantLineage == "673_69")TRUE 0.81679 0.28097 2.907 0.00365 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) trtBgg trtB&S trtU&O
## trtBagged -0.219
## trtBggd&Slf -0.301 0.396
## trtUnbggd&O -0.348 0.420 0.583
## I(L=="673_6 -0.501 0.002 0.000 0.032
factorPreds <- predict(m1, newdata = avgDF)
length(factorPreds)
## [1] 66
plot(avgPreds, factorPreds)
abline(0,1)

m1 <- glmer(Freq ~ trt + (1|plantLineage) + (1|accessNum), family = poisson, data = cf1)
summary(m1)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: poisson ( log )
## Formula: Freq ~ trt + (1 | plantLineage) + (1 | accessNum)
## Data: cf1
##
## AIC BIC logLik deviance df.resid
## 709.4 724.0 -348.7 697.4 78
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -4.8555 -1.3244 -0.3544 1.1650 5.5802
##
## Random effects:
## Groups Name Variance Std.Dev.
## accessNum (Intercept) 0.2085 0.4566
## plantLineage (Intercept) 0.2278 0.4773
## Number of obs: 84, groups: accessNum, 22; plantLineage, 17
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 2.04258 0.17623 11.590 < 2e-16 ***
## trtBagged -0.97065 0.12274 -7.908 2.62e-15 ***
## trtBagged & Selfed 0.18796 0.08836 2.127 0.0334 *
## trtUnbagged & Outcrossed 0.74998 0.08352 8.980 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) trtBgg trtB&S
## trtBagged -0.200
## trtBggd&Slf -0.275 0.396
## trtUnbggd&O -0.312 0.421 0.583
m1_1 <- glmer(Freq ~ trt + (1|plantLineage), family = poisson, data = cf1)
anova(m1, m1_1, test = "LRT") # keep accessNum
## Data: cf1
## Models:
## m1_1: Freq ~ trt + (1 | plantLineage)
## m1: Freq ~ trt + (1 | plantLineage) + (1 | accessNum)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## m1_1 5 768.19 780.35 -379.10 758.19
## m1 6 709.42 724.01 -348.71 697.42 60.773 1 6.404e-15 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(m1)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: poisson ( log )
## Formula: Freq ~ trt + (1 | plantLineage) + (1 | accessNum)
## Data: cf1
##
## AIC BIC logLik deviance df.resid
## 709.4 724.0 -348.7 697.4 78
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -4.8555 -1.3244 -0.3544 1.1650 5.5802
##
## Random effects:
## Groups Name Variance Std.Dev.
## accessNum (Intercept) 0.2085 0.4566
## plantLineage (Intercept) 0.2278 0.4773
## Number of obs: 84, groups: accessNum, 22; plantLineage, 17
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 2.04258 0.17623 11.590 < 2e-16 ***
## trtBagged -0.97065 0.12274 -7.908 2.62e-15 ***
## trtBagged & Selfed 0.18796 0.08836 2.127 0.0334 *
## trtUnbagged & Outcrossed 0.74998 0.08352 8.980 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) trtBgg trtB&S
## trtBagged -0.200
## trtBggd&Slf -0.275 0.396
## trtUnbggd&O -0.312 0.421 0.583
merPreds <- predict(m1, newdata = avgDF)
plot(factorPreds, merPreds)
abline(0,1)

m1 <- glmer(Freq ~ trt + (1|accessNum), family = poisson, data = cf1)
summary(m1)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: poisson ( log )
## Formula: Freq ~ trt + (1 | accessNum)
## Data: cf1
##
## AIC BIC logLik deviance df.resid
## 710.9 723.0 -350.4 700.9 79
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -4.8677 -1.3540 -0.3762 1.1541 5.5431
##
## Random effects:
## Groups Name Variance Std.Dev.
## accessNum (Intercept) 0.4585 0.6771
## Number of obs: 84, groups: accessNum, 22
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 2.18977 0.16026 13.664 < 2e-16 ***
## trtBagged -0.97104 0.12274 -7.911 2.55e-15 ***
## trtBagged & Selfed 0.18792 0.08836 2.127 0.0335 *
## trtUnbagged & Outcrossed 0.74877 0.08357 8.960 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) trtBgg trtB&S
## trtBagged -0.219
## trtBggd&Slf -0.302 0.396
## trtUnbggd&O -0.335 0.421 0.583
nonIndPreds <- predict(m1, newdata = avgDF)
plot(nonIndPreds, merPreds)
abline(0,1)

# calculate LRT for trt
m2 <- update(m1, .~. - trt)
anova(m1, m2) # highly significant
## Data: cf1
## Models:
## m2: Freq ~ (1 | accessNum)
## m1: Freq ~ trt + (1 | accessNum)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## m2 2 1000.39 1005.25 -498.19 996.39
## m1 5 710.86 723.01 -350.43 700.86 295.53 3 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Model diagnostics
m1 <- glmer(Freq ~ trt + (1|plantLineage) + (1|accessNum), family = poisson, data = cf1)
summary(m1)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: poisson ( log )
## Formula: Freq ~ trt + (1 | plantLineage) + (1 | accessNum)
## Data: cf1
##
## AIC BIC logLik deviance df.resid
## 709.4 724.0 -348.7 697.4 78
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -4.8555 -1.3244 -0.3544 1.1650 5.5802
##
## Random effects:
## Groups Name Variance Std.Dev.
## accessNum (Intercept) 0.2085 0.4566
## plantLineage (Intercept) 0.2278 0.4773
## Number of obs: 84, groups: accessNum, 22; plantLineage, 17
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 2.04258 0.17623 11.590 < 2e-16 ***
## trtBagged -0.97065 0.12274 -7.908 2.62e-15 ***
## trtBagged & Selfed 0.18796 0.08836 2.127 0.0334 *
## trtUnbagged & Outcrossed 0.74998 0.08352 8.980 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) trtBgg trtB&S
## trtBagged -0.200
## trtBggd&Slf -0.275 0.396
## trtUnbggd&O -0.312 0.421 0.583
# diagnostics
# qq plot
qqnorm(resid(m1), main = "")
qqline(resid(m1)) # outliers

# residual plot
plot(fitted(m1), resid(m1), xlab = "fitted", ylab = "residuals")
abline(0,0)

# possibly outliers
# QQPlot for group-level effects
qqnorm(ranef(m1)$accessNum[[1]], main="Normal Q-Q plot for random effects")
qqline(ranef(m1)$accessNum[[1]])

# # QQPlot for group-level effects
qqnorm(ranef(m1)$plantLineage[[1]], main="Normal Q-Q plot for random effects")
qqline(ranef(m1)$plantLineage[[1]])

infl <- influence(m1, obs = TRUE) # takes a while to calculate
# x11()
plot(infl, which = 'cook') # some influential points

# visualize model:
sjp.lmer(m1, type = 'fe')

sjp.lmer(m1, type = 're', sort = TRUE) # plot random effects to look for outliers
## Plotting random effects...
## Plotting random effects...


#check assumptions of model
overdisp_fun <- function(model) {
## number of variance parameters in
## an n-by-n variance-covariance matrix
vpars <- function(m) {
nrow(m)*(nrow(m)+1)/2
}
model.df <- sum(sapply(VarCorr(model),vpars))+length(fixef(model))
rdf <- nrow(model.frame(model))-model.df
rp <- residuals(model,type="pearson")
Pearson.chisq <- sum(rp^2)
prat <- Pearson.chisq/rdf
pval <- pchisq(Pearson.chisq, df=rdf, lower.tail=FALSE)
c(chisq=Pearson.chisq,ratio=prat,rdf=rdf,p=pval)
}
overdisp_fun(m1) # shows overdispersion
## chisq ratio rdf p
## 2.727300e+02 3.496539e+00 7.800000e+01 2.078860e-23
# here's another way to check for overdispersion
residDev <- sum(residuals(m1, type = 'deviance')^2) # calculate residual deviance
# this ratio should be about 1 -- larger than 1 suggests overdispersion
residDev / df.residual(m1)
## [1] 4.072153
Use negative binomial model, since the poisson model is overdispersed
m1.1 <- glmer.nb(Freq ~ trt +(1|plantLineage) + (1|accessNum), data = cf1)
summary(m1.1)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: Negative Binomial(2.1736) ( log )
## Formula: Freq ~ trt + (1 | plantLineage) + (1 | accessNum)
## Data: cf1
##
## AIC BIC logLik deviance df.resid
## 571.9 588.9 -279.0 557.9 77
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.3912 -0.5687 -0.1032 0.4856 2.6633
##
## Random effects:
## Groups Name Variance Std.Dev.
## accessNum (Intercept) 0.07951 0.2820
## plantLineage (Intercept) 0.33723 0.5807
## Number of obs: 84, groups: accessNum, 22; plantLineage, 17
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 2.06409 0.23661 8.723 < 2e-16 ***
## trtBagged -1.06039 0.25396 -4.175 2.98e-05 ***
## trtBagged & Selfed 0.04771 0.24127 0.198 0.843251
## trtUnbagged & Outcrossed 0.78092 0.23708 3.294 0.000988 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) trtBgg trtB&S
## trtBagged -0.465
## trtBggd&Slf -0.487 0.479
## trtUnbggd&O -0.533 0.457 0.469
m1.2 <- update(m1.1, .~. - (1|accessNum))
anova(m1.1, m1.2, test = "LRT")
## Data: cf1
## Models:
## m1.2: Freq ~ trt + (1 | plantLineage)
## m1.1: Freq ~ trt + (1 | plantLineage) + (1 | accessNum)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## m1.2 6 570.42 585.00 -279.21 558.42
## m1.1 7 571.92 588.93 -278.96 557.92 0.5036 1 0.4779
summary(m1.2)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: Negative Binomial(2.1736) ( log )
## Formula: Freq ~ trt + (1 | plantLineage)
## Data: cf1
##
## AIC BIC logLik deviance df.resid
## 570.4 585.0 -279.2 558.4 78
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.3923 -0.5796 -0.1868 0.4738 2.6743
##
## Random effects:
## Groups Name Variance Std.Dev.
## plantLineage (Intercept) 0.4044 0.636
## Number of obs: 84, groups: plantLineage, 17
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 2.04901 0.23387 8.762 < 2e-16 ***
## trtBagged -1.04929 0.25272 -4.152 3.29e-05 ***
## trtBagged & Selfed 0.07543 0.23753 0.318 0.750825
## trtUnbagged & Outcrossed 0.78304 0.23636 3.313 0.000923 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) trtBgg trtB&S
## trtBagged -0.465
## trtBggd&Slf -0.480 0.476
## trtUnbggd&O -0.538 0.461 0.475
# m1.1 <- glmer.nb(Freq ~ trt + I(plantLineage == "673_69") + (1|accessNum), data = cf1)
# summary(m1.1)
# the model below will not converge
# m1.1a <- glmer.nb(Freq ~ trt + I(plantLineage == "673_69") + trt: I(plantLineage == "673_69") + (1|accessNum), data = cf1, control=glmerControl(optimizer="bobyqa"))
# summary(m1.1a)
# m1.1 <- glmer.nb(avgFreq ~ trt +(1|accessNum), data = avgDF)
# summary(m1.1)
# get overall p-value for treatment
m2.1 <- update(m1.1, .~. - trt)
anova(m1.1, m2.1, test = 'LRT')
## Data: cf1
## Models:
## m2.1: Freq ~ (1 | plantLineage) + (1 | accessNum)
## m1.1: Freq ~ trt + (1 | plantLineage) + (1 | accessNum)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## m2.1 4 618.65 628.37 -305.32 610.65
## m1.1 7 571.92 588.93 -278.96 557.92 52.73 3 2.093e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Negative binomial model diagnostics
overdisp_fun(m1.2)
## chisq ratio rdf p
## 64.4071127 0.8152799 79.0000000 0.8824556
# here's another way to check for overdispersion
residDev <- sum(residuals(m1.2, type = 'deviance')^2) # calculate residual deviance
# this ratio should be about 1 -- larger than 1 suggests overdispersion
residDev / df.residual(m1.2)
## [1] 1.196974
# diagnostics
# qq plot
qqnorm(resid(m1.2), main = "")
qqline(resid(m1.2)) # a little better

# residual plot
plot(fitted(m1.2), resid(m1.2), xlab = "fitted", ylab = "residuals")
abline(0,0) # residuals look better for this model

# QQPlot for group-level effects
# qqnorm(ranef(m1.2)$accessNum[[1]], main="Normal Q-Q plot for random effects")
# qqline(ranef(m1.2)$accessNum[[1]])
qqnorm(ranef(m1.2)$plantLineage[[1]], main="Normal Q-Q plot for random effects")
qqline(ranef(m1.2)$plantLineage[[1]])

infl <- influence(m1.1, obs = TRUE) # takes a while to calculate
plot(infl, which = 'cook') # nothing above 1

# visualize model:
sjp.lmer(m1.2, type = 'fe')

sjp.lmer(m1.2, type = 're', sort = TRUE) # plot random effects to look for outliers
## Plotting random effects...

# get estimated dispersion parameter for NB Model
getME(m1.1, "glmer.nb.theta") # 2.17
## [1] 2.17355
# post-hoc pairwise comparisons with adjusted p-values
# from documentation:
# test = adjusted() multiple test procedures as specified by the type argument
# to adjusted: "single-step" denotes adjusted p values as computed
# from the joint normal or t distribution of the z statistics (default),
summary(glht(m1.2, lsm(pairwise ~ trt)), test=adjusted("single-step")) # pairwise tests for fruit counts
##
## Simultaneous Tests for General Linear Hypotheses
##
## Fit: glmer(formula = Freq ~ trt + (1 | plantLineage), data = cf1,
## family = negative.binomial(theta = 2.17355047077783))
##
## Linear Hypotheses:
## Estimate Std. Error z value
## Control - Bagged == 0 1.04929 0.25272 4.152
## Control - Bagged & Selfed == 0 -0.07543 0.23753 -0.318
## Control - Unbagged & Outcrossed == 0 -0.78304 0.23636 -3.313
## Bagged - Bagged & Selfed == 0 -1.12472 0.25133 -4.475
## Bagged - Unbagged & Outcrossed == 0 -1.83233 0.25419 -7.208
## Bagged & Selfed - Unbagged & Outcrossed == 0 -0.70761 0.24269 -2.916
## Pr(>|z|)
## Control - Bagged == 0 < 0.001 ***
## Control - Bagged & Selfed == 0 0.98891
## Control - Unbagged & Outcrossed == 0 0.00529 **
## Bagged - Bagged & Selfed == 0 < 0.001 ***
## Bagged - Unbagged & Outcrossed == 0 < 0.001 ***
## Bagged & Selfed - Unbagged & Outcrossed == 0 0.01840 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## (Adjusted p values reported -- single-step method)
Visualize annotated plot for fruit counts
count_pub <- within(countFin, {
trt <- mapvalues(trt, from = c("Control","Control_2", "Bagged", "Bagged & Selfed",
"Unbagged & Outcrossed"),
to = c("Control","Control_2", "Bagged", "Bagged\n & Selfed",
"Outcrossed"))
})
count_pub <- droplevels(count_pub)
ggplot(count_pub[!(count_pub$trt %in% 'Control_2'), ], aes(x = trt , y = Freq+ 0.1)) +
geom_boxplot(width = 0.2, fill = 'grey80') +
labs(y = "Number of fruits", x = "Treatment") +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
legend.position = 'none') +
# annotate
annotate(geom="text", x=c(1,2,3,4), y=c(70, 70, 70, 70) + 10, label=c("a", "b", "a", "c"),
color="black") +
scale_y_log10(breaks = c(1, 10, 100), limits = c(0.1, 100))

ggsave(paste0(saveDir, "KalmiaFruitNumber_logScale.pdf"), width = 3, height = 4)
Visualize CI’s for each of the groups
# refit m1.1 with treatments in alphabetical order (b/c order isn't preserved in model matrix)
cf1.1 <- within(cf1, {
trt = as.factor(as.character(cf1$trt))
})
# refit model with different reference levels
m1.2 <- glmer.nb(Freq ~ trt + (1 | plantLineage) + (1 | accessNum), data = cf1.1, glmerControl(optimizer="bobyqa", optCtrl = list(maxfun = 100000)))
summary(m1.2)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: Negative Binomial(2.1735) ( log )
## Formula: Freq ~ trt + (1 | plantLineage) + (1 | accessNum)
## Data: cf1.1
##
## AIC BIC logLik deviance df.resid
## 571.9 588.9 -279.0 557.9 77
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.3912 -0.5687 -0.1032 0.4856 2.6633
##
## Random effects:
## Groups Name Variance Std.Dev.
## accessNum (Intercept) 0.07951 0.2820
## plantLineage (Intercept) 0.33722 0.5807
## Number of obs: 84, groups: accessNum, 22; plantLineage, 17
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.0037 0.2542 3.949 7.84e-05 ***
## trtBagged & Selfed 1.1081 0.2531 4.379 1.19e-05 ***
## trtControl 1.0604 0.2540 4.175 2.97e-05 ***
## trtUnbagged & Outcrossed 1.8413 0.2563 7.183 6.81e-13 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) trtB&S trtCnt
## trtBggd&Slf -0.544
## trtControl -0.566 0.547
## trtUnbggd&O -0.598 0.532 0.568
pframe <- data.frame(trt = levels(droplevels(cf1.1$trt)))
pframe$Freq <- 0
pp <- predict(m1.2, newdata = pframe, re.form=NA, type = 'response') # re.form sets all random effects to 0
### Calculate CI's (using bootstrap, not accounting for random effects)
mm <- model.matrix(terms(m1.2), pframe)
predFun<-function(.) exp(mm%*%fixef(.) )
bb<-bootMer(m1.2,FUN=predFun,nsim=200) #do this 200 times
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge with max|grad| = 0.00474257 (tol =
## 0.001, component 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : unable to evaluate scaled gradient
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge: degenerate Hessian with 1 negative
## eigenvalues
#as we did this 200 times the 95% CI will be bordered by the 5th and 195th value
bb_se<-apply(bb$t,2,function(x) quantile(x, probs = c(0.025, 0.975)))
pframe$blo<-bb_se[1,]
pframe$bhi<-bb_se[2,]
pframe$predMean <- pp
pframe$median = tapply(cf1.1$Freq, INDEX = cf1.1$trt, median)
pframe$trt <- mapvalues(pframe$trt, from = c("Control", "Bagged", "Bagged & Selfed",
"Unbagged & Outcrossed"),
to = c("Control", "Bagged", "Bagged\n & Selfed",
"Outcrossed"))
pframe$trt <- relevel(pframe$trt, ref = "Control")
#plot 95% confidence intervals
# "Mean and bootstrap CI based on fixed-effects uncertainty ONLY"
number_ticks <- function(n) {function(limits) pretty(limits, n)}
g0 <- ggplot(pframe, aes(x=trt, y=predMean))+
geom_point()+
labs(y = "Number of fruits", x = "Treatment") +
geom_errorbar(aes(ymin = blo, ymax = bhi), width = 0.1)+
scale_y_log10(limits =c(1,60), breaks = number_ticks(6)) +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
legend.position = 'none') +
# annotate
annotate(geom="text", x=c(1,2,3,4), y=c(70, 70, 70, 70) - 10, label=c("a", "b", "a", "c"),
color="black")
g0

ggsave(paste0(saveDir, "KalmiaFruitNumber_BSCI_logScale.pdf"), width = 3, height = 4)
## Bootstrap CI on linear scale -- not that great!
g1 <- ggplot(pframe, aes(x=trt, y=predMean))+
geom_point()+
labs(y = "Number of Fruits", x = "Treatment") +
geom_errorbar(aes(ymin = blo, ymax = bhi), width = 0.1)+
theme(axis.text.x = element_text(angle = 45, hjust = 1),
legend.position = 'none') +
# annotate
annotate(geom="text", x=c(1,2,3,4), y=c(70, 70, 70, 70) - 40, label=c("a", "b", "a", "c"),
color="black")
g1

ggsave(paste0(saveDir, "KalmiaFruitNumber_BSCI_LinearScale.pdf"), width = 3, height = 4)
cp <- droplevels(count_pub[count_pub$trt != 'Control_2',])
# this might actually be the best way
ggplot(cp, aes(x = trt , y = Freq)) +
geom_boxplot(width = 0.2, fill = 'grey80') +
labs(y = "Number of fruits", x = "Treatment") +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
legend.position = 'none') +
# annotate
annotate(geom="text", x=c(1,2,3,4), y=c(70, 70, 70, 70) + 1, label=c("a", "b", "a", "c"),
color="black")

ggsave(paste0(saveDir, "KalmiaFruitNumber_LinearScale.pdf"), width = 3, height = 4)
Visualize fruit size
# calculate fruit size by plant
sizeDF_mean <- as.data.frame(tapply(kfrt$dia_mm, INDEX = kfrt$plantNum, mean))
colnames(sizeDF_mean) = "meanFrtSz"
sizeDF_mean$trt = sapply(X = 1:length(sizeDF_mean$meanFrtSz),
FUN = function(x) strsplit(as.character(row.names(sizeDF_mean)[x]),
split = "__")[[1]][2])
sizeDF_mean$trt <- mapvalues(sizeDF_mean$trt, c(1,2,3,4,5), c("Bagged", "Bagged & Selfed", "Control",
"Unbagged & Outcrossed", "Control_2"))
# reorder
sizeDF_mean$trt <- factor(sizeDF_mean$trt, levels = c("Control", "Control_2", "Bagged", "Bagged & Selfed",
"Unbagged & Outcrossed"))
sizeDF_mean$accessNum = sapply(X = 1:length(sizeDF_mean$meanFrtSz),
FUN = function(x) strsplit(as.character(row.names(sizeDF_mean)[x]),
split = "__")[[1]][1])
ggplot(sizeDF_mean, aes(x = trt, y = meanFrtSz, fill = trt)) +
geom_boxplot(alpha = 0.5) +
# stat_summary(fun.y=mean, geom="line", aes(group = accessNum, color = accessNum)) +
# stat_summary(fun.y=mean, geom="point", aes(group = accessNum, color = accessNum)) +
geom_point(aes(color = trt))

ggplot(sizeDF_mean, aes(x = trt, y = meanFrtSz, fill = trt)) +
geom_boxplot() +
labs(y = "Mean Fruit Diameter (mm)", x = "Treatment") +
scale_fill_brewer(name = "Treatment", palette = "Set1")

ggsave(paste0(saveDir, "KalmiaFruitDiameter.pdf"), width = 10, height = 8)
# visualize , but exclude treatment 5
ggplot(sizeDF_mean[!(sizeDF_mean$trt %in% 'Control_2'), ],
aes(x = trt, y = meanFrtSz, fill = trt)) +
geom_boxplot() +
labs(y = "Mean Fruit Diameter (mm)", x = "Treatment") +
scale_fill_brewer(name = "Treatment", palette = "Set1") +
theme(axis.text.x = element_text(angle = 35, hjust = 1),
legend.position = 'none')

ggsave(paste0(saveDir, "KalmiaFruitDiameter_trt14Only.pdf"), width = 5, height = 4)
Model Fruit Size with LMER
size1 <- within(kfrt, {
trt <- mapvalues(trt, c("Bagged", "Bagged & Selfed", "Unbagged",
"Unbagged & Outcrossed", "Unbagged_2"),
c("Bagged", "Bagged\n & Selfed", "Control",
"Outcrossed", "Control_2"))
})
size1 <- droplevels(size1[size1$trt != "Control_2",])
# get sample sizes for size dataset
nrow(size1) # total number of fruits in analysis for size
## [1] 1035
data.frame(table(size1$accessNum)) # total number of fruits per plant
## Var1 Freq
## 1 1129_74_A 93
## 2 1129_74_B 36
## 3 1129_74_C 69
## 4 1129_74_E 43
## 5 1129_74_F 48
## 6 12_2006_A 22
## 7 128_96_MASS 64
## 8 132_98_MASS 17
## 9 150_58_A 39
## 10 232_46_A 13
## 11 35_40_C 70
## 12 39_60_A 37
## 13 425_74_D 94
## 14 440_66_A 53
## 15 46_18_A 19
## 16 572_64_MASS 31
## 17 624_64_B 43
## 18 667_66_MASS 69
## 19 673_69_B 124
## 20 685_70_A 23
## 21 721_79_B 10
## 22 UNLABELED_1 18
data.frame(table(size1$plantLineage)) # total number of fruits per plant lineage
## Var1 Freq
## 1 12_2006 22
## 2 128_96 64
## 3 132_98 17
## 4 150_58 39
## 5 232_46 13
## 6 35_40 70
## 7 39_60 37
## 8 425_74 94
## 9 440_66 53
## 10 46_18 19
## 11 572_64 31
## 12 624_64 43
## 13 667_66 69
## 14 673_69 413
## 15 685_70 23
## 16 721_79 10
## 17 UNLABELED_1 18
size1$trt <- relevel(as.factor(size1$trt), ref = "Control")
f1 <- lmer(dia_mm ~ trt + (1|plantLineage/accessNum), data = size1)
summary(f1) # final model for paper
## Linear mixed model fit by REML ['lmerMod']
## Formula: dia_mm ~ trt + (1 | plantLineage/accessNum)
## Data: size1
##
## REML criterion at convergence: 1852.3
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.8939 -0.6307 0.0377 0.6695 3.4682
##
## Random effects:
## Groups Name Variance Std.Dev.
## accessNum:plantLineage (Intercept) 0.04439 0.2107
## plantLineage (Intercept) 0.13357 0.3655
## Residual 0.32638 0.5713
## Number of obs: 1035, groups: accessNum:plantLineage, 22; plantLineage, 17
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 4.23422 0.11027 38.40
## trtBagged -0.51953 0.07274 -7.14
## trtBagged\n & Selfed 0.01439 0.05304 0.27
## trtOutcrossed 0.73741 0.04881 15.11
##
## Correlation of Fixed Effects:
## (Intr) trtBgg trtB&S
## trtBagged -0.179
## trtBggd&Slf -0.235 0.387
## trtOutcrssd -0.296 0.404 0.537
f1 <- lmer(dia_mm ~ trt + (1|plantLineage) + (1|accessNum), data = size1)
summary(f1) # final model for paper (same as above)
## Linear mixed model fit by REML ['lmerMod']
## Formula: dia_mm ~ trt + (1 | plantLineage) + (1 | accessNum)
## Data: size1
##
## REML criterion at convergence: 1852.3
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.8939 -0.6307 0.0377 0.6695 3.4682
##
## Random effects:
## Groups Name Variance Std.Dev.
## accessNum (Intercept) 0.04439 0.2107
## plantLineage (Intercept) 0.13357 0.3655
## Residual 0.32638 0.5713
## Number of obs: 1035, groups: accessNum, 22; plantLineage, 17
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 4.23422 0.11027 38.40
## trtBagged -0.51953 0.07274 -7.14
## trtBagged\n & Selfed 0.01439 0.05304 0.27
## trtOutcrossed 0.73741 0.04881 15.11
##
## Correlation of Fixed Effects:
## (Intr) trtBgg trtB&S
## trtBagged -0.179
## trtBggd&Slf -0.235 0.387
## trtOutcrssd -0.296 0.404 0.537
# get p-value for trt
f2 <- update(f1, .~. - trt)
anova(f1, f2, "LRT")
## refitting model(s) with ML (instead of REML)
## Data: size1
## Models:
## f2: dia_mm ~ (1 | plantLineage) + (1 | accessNum)
## f1: dia_mm ~ trt + (1 | plantLineage) + (1 | accessNum)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## f2 4 2234.7 2254.5 -1113.36 2226.7
## f1 7 1851.4 1886.0 -918.71 1837.4 389.3 3 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Fruit Size Diagnostics
# diagnostics
# qq plot
qqnorm(resid(f1), main = "")
qqline(resid(f1)) # good

# residual plot
plot(fitted(f1), residuals(f1, type = "deviance"), xlab = "fitted", ylab = "residuals")
abline(0,0)

# QQPlot for group-level effects
qqnorm(ranef(f1)$accessNum[[1]], main="Normal Q-Q plot for random effects")
qqline(ranef(f1)$accessNum[[1]]) # one possible outlier

# QQPlot for group-level effects
qqnorm(ranef(f1)$plantLineage[[1]], main="Normal Q-Q plot for random effects")
qqline(ranef(f1)$plantLineage[[1]]) # looks good

infl <- influence(f1, obs = TRUE) # takes a while to calculate
plot(infl, which = 'cook') # nothing above 1

# visualize model:
sjp.lmer(f1, type = 'fe')
## Computing p-values via Kenward-Roger approximation. Use `p.kr = FALSE` if computation takes too long.

# Best Linear Unbiased Predictors (BLUPs)
sjp.lmer(f1, type = 're', sort = TRUE) # plot random effects to look for outliers
## Plotting random effects...
## Plotting random effects...


# post-hoc pairwise comparisons with adjusted p-values
# from documentation:
# test = adjusted() multiple test procedures as specified by the type argument
# to adjusted: "single-step" denotes adjusted p values as computed
# from the joint normal or t distribution of the z statistics (default),
summary(glht(f1, lsm(pairwise ~ trt)), test=adjusted("single-step")) # pairwise tests for fruit counts
## Loading required namespace: lmerTest
## Note: df set to 1020
##
## Simultaneous Tests for General Linear Hypotheses
##
## Fit: lmer(formula = dia_mm ~ trt + (1 | plantLineage) + (1 | accessNum),
## data = size1)
##
## Linear Hypotheses:
## Estimate Std. Error t value Pr(>|t|)
## Control - Bagged == 0 0.51953 0.07274 7.142 <1e-06
## Control - Bagged\n & Selfed == 0 -0.01439 0.05304 -0.271 0.993
## Control - Outcrossed == 0 -0.73741 0.04881 -15.107 <1e-06
## Bagged - Bagged\n & Selfed == 0 -0.53392 0.07156 -7.461 <1e-06
## Bagged - Outcrossed == 0 -1.25694 0.06930 -18.138 <1e-06
## Bagged\n & Selfed - Outcrossed == 0 -0.72301 0.04916 -14.709 <1e-06
##
## Control - Bagged == 0 ***
## Control - Bagged\n & Selfed == 0
## Control - Outcrossed == 0 ***
## Bagged - Bagged\n & Selfed == 0 ***
## Bagged - Outcrossed == 0 ***
## Bagged\n & Selfed - Outcrossed == 0 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## (Adjusted p values reported -- single-step method)
Visualize Fruit Size with annotations
# refit m1.1 with treatments in alphabetical order (b/c order isn't preserved in model matrix)
sf1<- within(size1, {
trt = as.factor(as.character(size1$trt))
})
# refit model with different reference levels
f1 <- lmer(dia_mm ~ trt + (1|plantLineage) + (1|accessNum), data = sf1)
pframe <- data.frame(trt = levels(droplevels(sf1$trt)))
pframe$dia_mm = 0
pp <- predict(f1, newdata = pframe, re.form=NA, type = 'response') # re.form sets all random effects to 0
### Calculate CI's (using bootstrap, not accounting for random effects)
mm <- model.matrix(terms(f1), pframe)
predFun<-function(.) mm%*%fixef(.)
bb<-bootMer(f1,FUN=predFun,nsim=200) #do this 200 times
# get quantiles from bootstrap sample
bb_se<-apply(bb$t,2,function(x) quantile(x, probs = c(0.025, 0.975)))
pframe$blo<-bb_se[1,]
pframe$bhi<-bb_se[2,]
pframe$predMean <- pp
pframe # print frame, in case reporting in a table
## trt dia_mm blo bhi predMean
## 1 Bagged 0 3.445394 3.935725 3.714693
## 2 Bagged\n & Selfed 0 4.022982 4.461613 4.248615
## 3 Control 0 3.991122 4.469500 4.234223
## 4 Outcrossed 0 4.715504 5.185830 4.971629
pframe$trt <- relevel(pframe$trt, ref = "Control")
#plot 95% confidence intervals
# "Mean and bootstrap CI based on fixed-effects uncertainty ONLY"
g2 <- ggplot(pframe, aes(x=trt, y=predMean))+
geom_point()+
labs(y = "Fruit dia. (mm)", x = "Treatment") +
geom_errorbar(aes(ymin = blo, ymax = bhi), width = 0.1)+
theme(axis.text.x = element_text(angle = 45, hjust = 1),
legend.position = 'none') +
# annotate
annotate(geom="text", x=c(1,2,3,4), y=c(5,5,5,5) + 0.7, label=c("a", "b", "a", "c"),
color="black")
g2

ggsave(paste0(saveDir, "KalmiaFruitDia_BSCI.pdf"), width = 3, height = 4)
## visualize raw data (mean fruit size per plant)
sdf1 <- droplevels(within(sizeDF_mean[!(sizeDF_mean$trt %in% 'Control_2'), ], {
trt <- mapvalues(trt, c("Bagged", "Bagged & Selfed", "Control",
"Unbagged & Outcrossed", "Control_2"),
c("Bagged", "Bagged\n & Selfed", "Control",
"Outcrossed", "Control_2"))
}))
# visualize fruit size
ggplot(sdf1, aes(x = trt, y = meanFrtSz)) +
geom_boxplot(width = 0.2, fill = 'grey80') +
labs(y = "Fruit dia. (mm)", x = "Treatment") +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
legend.position = 'none') +
# annotate
annotate(geom="text", x=c(1,2,3,4), y=c(5,5,5,5) + 2, label=c("a", "b", "a", "c"),
color="black")

ggsave(paste0(saveDir, "KalmiaFruitDia.pdf"), width = 3, height = 4)
# compare two plots
g2 + geom_boxplot(data = sdf1, aes(x = trt, y = meanFrtSz), width = 0.2, alpha = 0)

Visualize data to compare fruit size with number of seeds
# read in data
sizeSeed <- read.csv("KalmiaFruitSizeAndSeeds.csv")
ggplot(sizeSeed, aes(x = Dia..mm., y = NumSeeds)) +
geom_point() +
stat_smooth(method = 'lm', formula = y ~ exp(x), se = F) +
labs(x = 'Fruit Diameter (mm)', y = 'Num Seeds in 1 Carpel')

ggsave(paste0(saveDir, "KalmiaFruitSeeds.pdf"), width = 5, height = 4)
# on log scale
ggplot(sizeSeed, aes(x = Dia..mm., y = NumSeeds)) +
geom_point() +
stat_smooth(method = 'lm', formula = y ~ x, se = F) +
scale_y_continuous(trans="log") +
labs(y = "log(e) number of seeds")

# GLM
ss1 <- glm(NumSeeds ~ Dia..mm., family = poisson, data = sizeSeed)
summary(ss1)
##
## Call:
## glm(formula = NumSeeds ~ Dia..mm., family = poisson, data = sizeSeed)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -4.1484 -1.3123 0.0538 1.1869 3.4399
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.22136 0.32470 0.682 0.495
## Dia..mm. 0.63356 0.07019 9.027 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 156.083 on 18 degrees of freedom
## Residual deviance: 68.423 on 17 degrees of freedom
## AIC: 160.53
##
## Number of Fisher Scoring iterations: 4
ss1$deviance / ss1$df.residual # overdispersed
## [1] 4.024875
# Neg Binomial Model
ss2 <- glm.nb(NumSeeds ~ Dia..mm., data = sizeSeed)
summary(ss2) # final model for paper
##
## Call:
## glm.nb(formula = NumSeeds ~ Dia..mm., data = sizeSeed, init.theta = 7.72630546,
## link = log)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.03651 -0.83952 0.02891 0.59012 1.60618
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.2739 0.5660 0.484 0.628
## Dia..mm. 0.6215 0.1292 4.811 1.5e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for Negative Binomial(7.7263) family taken to be 1)
##
## Null deviance: 43.384 on 18 degrees of freedom
## Residual deviance: 18.964 on 17 degrees of freedom
## AIC: 135.72
##
## Number of Fisher Scoring iterations: 1
##
##
## Theta: 7.73
## Std. Err.: 3.54
##
## 2 x log-likelihood: -129.72
# get overall test stat for dia
ss3 <-update(ss2, .~. - Dia..mm.)
anova(ss2, ss3) # overall p-value for Dia..mm.
## Likelihood ratio tests of Negative Binomial Models
##
## Response: NumSeeds
## Model theta Resid. df 2 x log-lik. Test df LR stat.
## 1 1 2.828473 18 -145.5067
## 2 Dia..mm. 7.726305 17 -129.7204 1 vs 2 1 15.78629
## Pr(Chi)
## 1
## 2 7.091441e-05
## visualize CI's for the two different models
newD <- data.frame(Dia..mm. = seq(2, 6, length.out = 100))
nbPreds = predict(ss2, newD, type = 'response', se.fit = TRUE)
nd1 <- data.frame(newD, nppred = nbPreds$fit, npse = nbPreds$se.fit)
predsPois <- predict(ss1, newD, type = 'response', se.fit = TRUE)
n2 <- data.frame(nd1, poisPred = predsPois$fit, poisse = predsPois$se.fit)
n2$npUpper <- with(n2, nppred + 2 * npse)
n2$npLower <- with(n2, nppred - 2 * npse)
n2$pUpper <- with(n2, poisPred + 2 * poisse)
n2$pLower <- with(n2, poisPred - 2 * poisse)
# compare NB errors vs. Pois errors
ggplot(sizeSeed, aes(x = Dia..mm., y = NumSeeds)) +
geom_point() +
geom_line(data = n2, aes(x = Dia..mm., y = poisPred), color = 'blue') +
geom_line(data = n2, aes(x = Dia..mm., y = pUpper), color = 'blue', linetype = 2) +
geom_line(data = n2, aes(x = Dia..mm., y = pLower), color = 'blue', linetype = 2) +
labs(x = 'Fruit Diameter (mm)', y = 'Num Seeds in 1 Carpel') +
geom_line(data = n2, aes(x = Dia..mm., y = nppred), color = 'red') +
geom_line(data = n2, aes(x = Dia..mm., y = npUpper), color = 'red', linetype = 3) +
geom_line(data = n2, aes(x = Dia..mm., y = npLower), color = 'red', linetype = 3)

## model diagnostics for negative binomial model
plot(ss2, which = 4) #cook's distance

plot(y = residuals(ss2, type = 'deviance'), x = ss2$fitted.values)
abline(h = 0)

# should be close to 1
ss2$deviance / ss2$df.residual
## [1] 1.115512
# get sample size
nrow(sizeSeed) # total number of individual fruits
## [1] 19
sum(sizeSeed$NumSeeds) # total number of seeds counted
## [1] 382
Print sesion info
sessionInfo()
## R version 3.3.2 (2016-10-31)
## Platform: x86_64-apple-darwin13.4.0 (64-bit)
## Running under: OS X El Capitan 10.11.6
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] lsmeans_2.25 estimability_1.2 multcomp_1.4-6
## [4] TH.data_1.0-8 MASS_7.3-45 survival_2.40-1
## [7] mvtnorm_1.0-5 sjPlot_2.2.1 influence.ME_0.9-8
## [10] plyr_1.8.4 lme4_1.1-12 Matrix_1.2-8
## [13] ggplot2_2.2.1
##
## loaded via a namespace (and not attached):
## [1] Rcpp_0.12.9 stringdist_0.9.4.4 lattice_0.20-34
## [4] tidyr_0.6.1 zoo_1.7-14 lmtest_0.9-34
## [7] assertthat_0.1 rprojroot_1.2 digest_0.6.12
## [10] psych_1.6.12 mime_0.5 R6_2.2.0
## [13] backports_1.0.5 stats4_3.3.2 evaluate_0.10
## [16] coda_0.19-1 lazyeval_0.2.0 minqa_1.2.4
## [19] nloptr_1.0.4 DT_0.2 rmarkdown_1.3
## [22] labeling_0.3 splines_3.3.2 stringr_1.1.0
## [25] foreign_0.8-67 htmlwidgets_0.8 munsell_0.4.3
## [28] shiny_1.0.0 broom_0.4.1 httpuv_1.3.3
## [31] modelr_0.1.0 mnormt_1.5-5 htmltools_0.3.5
## [34] nnet_7.3-12 tibble_1.2 coin_1.1-3
## [37] codetools_0.2-15 dplyr_0.5.0 sjmisc_2.2.1
## [40] grid_3.3.2 nlme_3.1-130 arm_1.9-3
## [43] xtable_1.8-2 gtable_0.2.0 DBI_0.5-1
## [46] magrittr_1.5 scales_0.4.1 stringi_1.1.2
## [49] reshape2_1.4.2 effects_3.1-2 sandwich_2.3-4
## [52] blme_1.0-4 RColorBrewer_1.1-2 tools_3.3.2
## [55] purrr_0.2.2 sjstats_0.7.1 abind_1.4-5
## [58] parallel_3.3.2 yaml_2.1.14 colorspace_1.3-2
## [61] knitr_1.15.1 haven_1.0.0 merTools_0.3.0
## [64] modeltools_0.2-21
# print time of that html document was created
Sys.time()
## [1] "2017-03-11 19:16:17 EST"